#!/usr/bin/env python
# encoding: utf-8
"""
@author: youfeng
@email: youfeng243@163.com
@license: Apache Licence
@file: enterprise_clean.py
@time: 2018/2/5 11:10
"""
from common.mongo import MongDb
from logger import Logger

APP_DATA_CONF = {
    'host': '172.16.215.16',
    'port': 40042,
    'db': 'app_data',
    'username': 'work',
    'password': 'haizhi'
}

# 日志模块
log = Logger('enterprise_clean.log').get_logger()

source_db = MongDb(APP_DATA_CONF['host'], APP_DATA_CONF['port'], APP_DATA_CONF['db'],
                   APP_DATA_CONF['username'],
                   APP_DATA_CONF['password'], log=log)

PROVINCE_REGISTER_CODE = {
    '11': u'北京',
    '37': u'山东',
    '43': u'湖南',
    '45': u'广西',
    '63': u'青海',
    '32': u'江苏',
    '64': u'宁夏',
    '51': u'四川',
    '21': u'辽宁',
    '44': u'广东',
    '22': u'吉林',
    '61': u'陕西',
    '34': u'安徽',
    '12': u'天津',
    '31': u'上海',
    '54': u'西藏',
    '62': u'甘肃',
    '13': u'河北',
    '46': u'海南',
    '14': u'山西',
    '15': u'内蒙古',
    '53': u'云南',
    '42': u'湖北',
    '33': u'浙江',
    '23': u'黑龙江',
    '50': u'重庆',
    '65': u'新疆',
    '41': u'河南',
    '52': u'贵州',
    '35': u'福建',
    '36': u'江西',
}


def get_province(unified_social_credit_code):
    if not isinstance(unified_social_credit_code, basestring):
        return None

    if len(unified_social_credit_code) == 18:
        code = unified_social_credit_code[2:4]
    else:
        code = unified_social_credit_code[0:2]

    if code in PROVINCE_REGISTER_CODE:
        return PROVINCE_REGISTER_CODE[code]

    return None


def main():
    source_table = 'enterprise_data_gov_new_2018_01_08'
    for item in source_db.traverse_batch_field(source_table, {},
                                               ['province', 'city', 'company', 'unified_social_credit_code']):
        company = item.get('company')
        if company is None:
            log.error("没有企业信息 ...")
            continue

        province = item.get('province')
        if not isinstance(province, basestring):
            log.error("没有省份信息: company = {}".format(company))
            continue

        city = item.get('city')
        if not isinstance(city, basestring):
            # log.error("没有城市信息: company = {}".format(company))
            continue

        unified_social_credit_code = item.get('unified_social_credit_code')
        cal_province = get_province(unified_social_credit_code)
        if cal_province == province:
            continue

        if province not in city:
            log.info("company = {} province = {} cal_province = {} city = {}".format(
                company, province, cal_province, city))


if __name__ == '__main__':
    main()
